In [26]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import ipywidgets as widgets
from ipywidgets import interact

# Plotly plotting support
import plotly.offline as py
py.init_notebook_mode()
import plotly.graph_objs as go
import plotly.figure_factory as ff
import cufflinks as cf
cf.set_config_file(offline=True, world_readable=True, theme='ggplot')

# this file joins the MCO shipping sorter scanner message with the packorder information and UPC
# our goal is to figure out for each UPC, what is the typical boxsize people use

# load the mco shipping sorter scanner message and the packorder info
mco_scan = pd.read_csv('MCO_Scan.csv')
wms_packorder = pd.read_csv('wms_packorderid.csv')

# load stockitem
stock_item = pd.read_csv('wms_stockitem.csv')

# change the name
mco_scan = mco_scan.rename(index=str, columns={"Shipping Unit Barcode": "SHIPPINGLUBARCODE"})

# convert to floats
mco_scan['SHIPPINGLUBARCODE'] = pd.to_numeric(mco_scan['SHIPPINGLUBARCODE'], errors='coerce')
wms_packorder['SHIPPINGLUBARCODE'] = pd.to_numeric(wms_packorder['SHIPPINGLUBARCODE'], errors='coerce')

# merge the two table together on shippinglubarcode
packorder_boxsize = pd.merge(mco_scan, wms_packorder, on='SHIPPINGLUBARCODE', how='inner')

# delete the unnecessary cols
del packorder_boxsize['Scanner Id']
del packorder_boxsize['Disposition Code']
del packorder_boxsize['ID']
del packorder_boxsize['CREATED']

# remove the duplicates from wms_stockitem
stock_item = stock_item.drop_duplicates(subset=['PACKORDERID'])

# join the two table
upc_boxsize = pd.merge(packorder_boxsize, stock_item, on='PACKORDERID', how='inner')

# delete cols
del upc_boxsize['Unnamed: 0']
del upc_boxsize['ID']
del upc_boxsize['CREATED']

# change the name
upc_boxsize = upc_boxsize.rename(index=str, columns={"MATERIALUPC": "UPC"})

from scipy import stats # will use this library to help find the mode

# group by upc
group_upc = upc_boxsize.groupby('UPC')

# define the cols
upc = []
size = []

# find the upc and size of pack
for name,group in group_upc:
    # put all the upc's in the list
    upc.append(name)
    # get all the packaging barcode as a list
    boxsize = group['Packaging Barcode'].tolist()
    # find the mode
    m = stats.mode(boxsize)
    size.append(m[0][0])

# create a dataframe with upc and boxsize
find_boxsize = {'UPC': upc, 'Packaging Barcode': size}
find_boxsize_by_upc = pd.DataFrame(data=find_boxsize)

#find_boxsize_by_upc.to_csv('find_boxsize_by_upc.csv')
/Users/david.liu/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2785: DtypeWarning:

Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.

/Users/david.liu/anaconda3/lib/python3.6/site-packages/pandas/core/reshape/merge.py:962: UserWarning:

You are merging on int and float columns where the float values are not equal to their int representation

In [51]:
 
Out[51]:
UPC Packaging Barcode
0 56100008 9006004000
1 77295000 1108005250
2 77295037 2301501000
3 77400006 9006004000
4 77400022 9006004000
5 77400074 9006004000
6 77400580 9006004000
7 77421302 9006004000
8 77431653 9006004000
9 77431777 1108005250
10 77431779 9006004000
11 77460187 9006004000
12 83460030 --
13 94304831 9006004000
14 94307810 9006004000
15 94670150 1309006250
16 94670157 1309006250
17 99915000 1501201100
18 99918510 2301901600
19 810000413 9006004000
20 810000701 9006004000
21 810000725 9006004000
22 810000795 9006004000
23 810000927 9006004000
24 834674050 1108005250
25 834674053 1108005250
26 834678005 1108005250
27 834678023 1108005250
28 852142827 2201601220
29 910038130 9006004000
... ... ...
8786 849101053955 1108005250
8787 849101053986 1701305000
8788 852661007283 9006004000
8789 852661007290 9006004000
8790 853084004408 2301901600
8791 853881004090 9006004000
8792 853881004625 1309006250
8793 858639005381 9006004000
8794 871180830503 1108005250
8795 871180880852 1701305000
8796 871896985396 1701305000
8797 880954550003 9006004000
8798 889526077419 1501201100
8799 890297100403 1401007500
8800 890306200824 1401007500
8801 890306201139 1309006250
8802 890324300855 2601905250
8803 890324301603 1401007500
8804 890324301612 1701305000
8805 890324302023 1108005250
8806 890324302606 1108005250
8807 890324302689 1108005250
8808 890324302731 1108005250
8809 890605682067 1401007500
8810 893610647074 1701305000
8811 931274230605 1108005250
8812 932110401146 1108005250
8813 932110481170 1108005250
8814 932110486070 1108005250
8815 978073763139 1401007500

8816 rows × 2 columns